***************
* This file analyses the jobseeker click data
* Author: Daniel Kopp
***************

clear
clear matrix
clear mata

set maxvar 8000

import delimited using "data_processed\job_ads_from_api.csv"

drop if job_title== "" // 636 obs deleted
compress ad_hash

* Drop duplicates
cap drop dup
duplicates tag ad_hash, gen(dup)
tab dup 	// 0.41

bys ad_hash: gen n_dup = _n
drop if n_dup==2		// 2,397 (0.21%)
drop n_dup dup

* We drop the few obs where workload is missing
drop if workload_min==.		// 466

* Format von Datumvariablen
foreach i in  created publication_start_date publication_end_date    {
	    g `i'_tmp = date(`i', "YMD")
		format `i'_tmp %tdCCYY-NN-DD
		drop  `i'
		rename  `i'_tmp `i'
}

gen month_abm = mofd(publication_end_date)
format month_abm %tm

drop if month_abm<tm(2020m6) // We only keep job ads that have been on the platform since june 2020, as no clicks have been recorded before (242,192 obs deleted)


****
* Add isco occupations to the job ads:
****

preserve
import delimited using "data_processed\job_ads_from_api_occupation.csv", clear

duplicates drop ad_hash isco, force

sort ad_hash isco
bys ad_hash: gen n_isco = _n
tab n_isco,m		// 96% are unique 

rename isco isco_
	
drop cod_avam 
	
reshape wide isco_ , i(ad_hash) j(n_isco)

count					// 1,153,037
count if isco_1==.		// 17'781
count if isco_2!=.		// 37,342

tempfile api_ad_occup_wide
save `api_ad_occup_wide'
restore

merge m:1 ad_hash using `api_ad_occup_wide'		// 
drop if _merge==2		
drop _merge

replace isco_1=isco_2 if isco_1==.
replace isco_1=isco_3 if isco_1==.

count if isco_1==.					 //	 14,143
count if isco_2!=. & isco_2!=isco_1  // 30,315 (3.3%)

* In the following, we will only work with the first isco code (note that more than 96% of all job ads have only one code anyway)
gen isco4 = isco_1
drop isco_1 isco_2 isco_3

* Gen Isco 3 digit
tostring  isco4, gen(isco4_str)
gen 	isco3_str 	= substr(isco4_str,1,strlen(isco4_str)-1)
destring isco3_str	, gen(isco3)
drop isco4_str isco3_str

* Gen Isco 2 digit
tostring  isco3, gen(isco3_str)
gen 	isco2_str 	= substr(isco3_str,1,strlen(isco3_str)-1)
destring isco2_str	, gen(isco2)
drop isco2_str isco3_str

foreach i in 10 20 30 40 50 60 70 80 90 {
	recode isco2 (`i'=.)
}

tab isco2,m

* Gen Isco 1 digit
tostring  isco2, gen(isco2_str)
gen 	isco1_str 	= substr(isco2_str,1,strlen(isco2_str)-1)
destring isco1_str	, gen(isco1)
drop isco1_str isco2_str

forvalues i = 1/4 {
cap run "Help_files\label_isco08_`i'.do"
label values isco`i' isco08_`i'_lab
}


preserve
use "data_processed\ad_clicks.dta", clear

count					// 10,414,439
count if stes_id!="" 	// 9,180,376    (88% of all clicks on the platform are from registered jobseekers)

distinct user 					// 175,495
distinct user if stes_id!="" 	// 138,535  (79% of the platform users are registered jobseekers)

keep if stes_id!=""		// we keep only clicks from registered job seekers 

gen year_click = year(date)
gen month_click = month(date)
tab month_click if year_click==2020
tab month_click if year_click==2021		

drop if year_click==2021 & month_click==6   // we drop ad clicks from June 2021 as ads created after June 1, 2021 are not part of the job ad (api) sample
											// Hence, the sample is from June 6, 2020 - May 31, 2021
tempfile ad_clicks_temp
save `ad_clicks_temp'
restore

merge 1:m ad_hash using `ad_clicks_temp'	//  385,923   from Master not matched (no clicks)
	
tab month_click if year_click==2020		& _merge==2
tab month_click if year_click==2021		& _merge==2
drop if _merge==2			//   211,497  
							
									
* How many of the clicks stem from registered jobseekers?
count if  date!=.				//  8,095,507 - all clicks of registered jobseekers 

* Gen pensum_cat
gen 	pensum_cat = 0 if workload_min==100		// only full-time
replace pensum_cat = 1 if workload_min<100 & workload_max==100 // full-time or part-time
replace pensum_cat = 2 if workload_max<100 // only part-time

label define pensum 0 "Full-time job" 1 "Full- or part-time job" 2 "Part-time job"
label values pensum_cat pensum


******************************
* Descriptive stats
******************************

distinct ad_hash					// 		910,379 (including job ads without a click)
distinct ad_hash		if date!=.  //		524,456 - this sample consists only of job ads with at least one click
distinct stes_id 		if date!=. 	// 		134,903 different stes
count					if date!=.  // 		8,095,507 views in total
distinct isco2						// 		38 without armed forces ("63, Subsistence farmers, fishers, hunters and gatherers" and "95, Street and related sales and service workers"	are missing, )


*****************
* Share of clicks on full-/part-time ads by detailed FTE
*****************

* Note that the clicks have only been recorded from june 2020 onwards

tab pensum_cat if date!=.				//  57% of all job ad views are full-time job ads, 27.6% are part-time job ads and 15.4% cover both categories

* Detailed part-time categories
gen 	pensum_det_10 = workload_max==100
forvalues i = 0/9 {
		gen 	pensum_det_`i' = 0
		forvalues x = 0/9 {
			replace pensum_det_`i' = 1 if inrange(`i'`x',workload_min,workload_max)		
		}
}


* Save share of clicks on different part-time categories  
preserve 
keep if date!=.
keep ad_hash pensum_det_0 pensum_det_1 pensum_det_2 pensum_det_3 pensum_det_4 pensum_det_5 pensum_det_6 pensum_det_7 pensum_det_8 pensum_det_9  pensum_det_10

bys ad_hash: gen n = _n
egen ad_view_id = group(ad_hash n)
drop ad_hash

drop pensum_det_0

reshape long pensum_det_, i(ad_view_id) j(num_pensum)

collapse pensum_det_, by(num_pensum)
tabstat pensum_det_ , by(num_pensum)

label define num_pensum_lab 0 "0-9" 1 "10-19" 2 "20-29" 3 "30-39" 4 "40-49" 5 "50-59" 6 "60-69" 7 "70-79" 8 "80-89" 9 "90-99" 10 "100" 
label values num_pensum num_pensum_lab

rename pensum_det_ pensum_det_click
save "Misc_files\share_job_ad_clicks_by_FTE_temp_api_sample.dta", replace	
restore


* Save share of clicks on part-time ads by isco 1
tab pensum_cat,m nol

cap gen parttime = pensum_cat==2
cap gen part_or_fulltime = inrange(pensum_cat,1,2)

preserve 
keep isco1 parttime part_or_fulltime

rename parttime parttime_clicks
rename part_or_fulltime part_or_fulltime_clicks

collapse parttime_clicks part_or_fulltime_clicks, by(isco1)
tabstat parttime_clicks part_or_fulltime_clicks, by(isco1)

save "Misc_files\share_job_ad_clicks_on_parttime_by_isco1_api_sample.dta", replace	
restore


****************
* Average number of clicks on a job ad by number of weekly working hours
****************

bys ad_hash: gen number_clicks_total = _N if date!=.
sum number_clicks_total, d
replace number_clicks_total = r(p99) if number_clicks_total>r(p99) & number_clicks_total!=.  // we windsorize at the 99th percentile
replace number_clicks_total = 0 if  number_clicks_total==.

bys ad_hash: gen n = _n
		
gen 	pensum_det_clicks_10 = number_clicks_total if workload_max==100
forvalues i = 0/9 {
		gen 	pensum_det_clicks_`i' = .
		forvalues x = 0/9 {
			replace pensum_det_clicks_`i' = number_clicks_total if inrange(`i'`x',workload_min,workload_max)		
		}
}

count if pensum_det_clicks_0!=. & date!=. & n==1 // only 133 ads

* Including ads with o clicks
preserve 
keep ad_hash pensum_det_clicks_10 pensum_det_clicks_0 pensum_det_clicks_1 pensum_det_clicks_2 pensum_det_clicks_3 pensum_det_clicks_4 pensum_det_clicks_5 pensum_det_clicks_6 pensum_det_clicks_7 pensum_det_clicks_8 pensum_det_clicks_9

collapse pensum_det_clicks_10 pensum_det_clicks_0 pensum_det_clicks_1 pensum_det_clicks_2 pensum_det_clicks_3 pensum_det_clicks_4 pensum_det_clicks_5 pensum_det_clicks_6 pensum_det_clicks_7 pensum_det_clicks_8 pensum_det_clicks_9, by(ad_hash)
drop pensum_det_clicks_0
reshape long pensum_det_clicks_, i(ad_hash) j(num_pensum)

collapse pensum_det_clicks_, by(num_pensum)
tabstat pensum_det_clicks_ , by(num_pensum)

label define num_pensum_lab 1 "10-19" 2 "20-29" 3 "30-39" 4 "40-49" 5 "50-59" 6 "60-69" 7 "70-79" 8 "80-89" 9 "90-99" 10 "100" 
label values num_pensum num_pensum_lab

* Produce table to have numerical estimates
export delimited using "$results_part_time\figure_d10_table.csv" , replace

graph bar (mean) pensum_det_clicks_ , ///
			over(num_pensum, gap(15) label(labsize(small)) ) bar(1,color(gs9) lcolor(white)) ///
			blabel(bar, format(%4.0f)) ylabel(0(10)40, format(%9.0fc) labsize(medsmall)) ytitle(Average # of clicks on a job ad, margin(medsmall))  ///
			b1title(Weekly hours in full-time equivalents FTE, margin(medsmall)) graphregion(color(white)) bgcolor(white)  
graph export "$results_part_time\figure_d10.eps" , as(eps) replace				
restore


*****
* Share of clicks on full-/part-time ads by gender
*****

cap drop _merge
merge m:1 stes_id using "data_processed\stes.dta", keepusing(persnr cod_gender num_vermittlungs_grad dat_anmeld dat_abmeld dat_geburts_datum code_noga_letzt_ag)

gen 	byte num_vermittlungs_grad_byte=num_vermittlungs_grad
replace num_vermittlungs_grad_byte = . if num_vermittlungs_grad_byte==0

gen 	fulltime = 0 if num_vermittlungs_grad_byte<100
replace	fulltime = 1 if num_vermittlungs_grad_byte==100

tab  fulltime ,m


* Gen number of unemployment spells per person prior to 01Jul2018
bys stes_id: gen n_stes = _n
gen 	n_stes1 = 1 if n_stes==1
replace n_stes1 = . if stes_id==""
tab n_stes1,m 


preserve 
keep if n_stes1==1

* Count people who were registered (and de-registered) before 01Mar2017
gen 	persnr_bef01Mar2017 = persnr 
replace persnr_bef01Mar2017 = . if dat_abmeld>=td(01Mar2017)	

bys persnr: egen n_unempl_spells_bef01Mar2017 = count(persnr_bef01Mar2017) 
tab n_unempl_spells_bef01Mar2017 ,m

gen unenmpl_bef01Mar2017 = n_unempl_spells_bef01Mar2017>0 & n_unempl_spells_bef01Mar2017!=. 

* Share of jobseekers registered between 01Mar2017 and 31Dec2017 who were registered at least once in the 1.5 years before
* Note: The difference between full- and part-time jobseekers does not change if we exclude jobseekers younger than 25 at registration
eststo clear
estpost sum  unenmpl_bef01Mar2017 if cod_gender==0 & fulltime==1 & inrange(dat_anmeld,td(01Mar2017),td(31Dec2017)) 			// men full-time
eststo full_men
estimates save "${save_path}\unem_before_full_men" , replace  
estpost sum  unenmpl_bef01Mar2017 if cod_gender==0 & fulltime==0 & inrange(dat_anmeld,td(01Mar2017),td(31Dec2017))			// men part-time
eststo part_men
estimates save "${save_path}\unem_before_part_men" , replace  
* Difference men
estpost ttest unenmpl_bef01Mar2017 if cod_gender==0 & inrange(dat_anmeld,td(01Mar2017),td(31Dec2017)) , by(fulltime)
eststo diffmen 
estimates save "${save_path}\unem_before_diff_men" , replace  
estpost sum   unenmpl_bef01Mar2017 if cod_gender==1 & fulltime==1 & inrange(dat_anmeld,td(01Mar2017),td(31Dec2017)) 		// women full-time
eststo full_women
estimates save "${save_path}\unem_before_full_women" , replace  
estpost sum   unenmpl_bef01Mar2017 if cod_gender==1 & fulltime==0 & inrange(dat_anmeld,td(01Mar2017),td(31Dec2017))			// women part-time
eststo part_women
estimates save "${save_path}\unem_before_part_women" , replace  
* Difference women
estpost ttest unenmpl_bef01Mar2017 if cod_gender==1 & inrange(dat_anmeld,td(01Mar2017),td(31Dec2017)) , by(fulltime)
eststo diffwomen 
estimates save "${save_path}\unem_before_diff_women" , replace  

label var unenmpl_bef01Mar2017 "\quad Unemployed before March 1, 2017"

* Show the table
esttab full_women part_women diffwomen full_men part_men diffmen, cells("mean(pattern(1 1 0 1 1 0) fmt(%9.3gc)) b(star pattern(0 0 1 0 0 1) fmt(%9.3gc))")  label  collabels(none) varlabels(`e(labels)')  ///
		mgroups("\textbf{Women}" "\textbf{Men}",pattern(1 0 0 1 0 0)  span prefix(\multicolumn{@span}{c}{) suffix(})) ///
		mtitle("fulltime" "parttime" "difference" "fulltime" "parttime" "difference") ///
		title(Share jobseekers unemployed before March 1, 2017) 
restore


* We remove job seekers from the using dataset who deregistered before June 2020 (when the click dataset starts)
drop if _merge==2 & dat_abmeld<td(01-06-2020)		


*******
* Share of clicks on full-/part-time ads by gender
*******

tab pensum_cat if date!=. & cod_gender==0	// 69% of the clicks from men are on full-time job ads - this is much lower than the share of men working full-time, 17% of clicks from men are on part-time job ads and 14% are on job ads covering part- and full-time positions

tab pensum_cat if date!=. & cod_gender==1	// 45% of the clicks from women are on full-time job ads - this is more or less equivalent to the share of women working full-time, 38% of clicks from women are on part-time job ads and 17% are on job ads covering part- and full-time positions


*******
* Share of jobseekers appearing in the click dataset who state a full-/part-time preference
*******

*For men and women separately:
forvalues i = 0/1 {
	preserve 
	keep if date!=.
	keep if cod_gender==`i'
	keep stes_id num_vermittlungs_grad_byte 
	drop if num_vermittlungs_grad_byte==.

	gen pensum_det_10 = num_vermittlungs_grad_byte==100
	gen pensum_det_9  = inrange(num_vermittlungs_grad_byte,90,99)
	gen pensum_det_8  = inrange(num_vermittlungs_grad_byte,80,89)
	gen pensum_det_7  = inrange(num_vermittlungs_grad_byte,70,79)
	gen pensum_det_6  = inrange(num_vermittlungs_grad_byte,60,69)
	gen pensum_det_5  = inrange(num_vermittlungs_grad_byte,50,59)
	gen pensum_det_4  = inrange(num_vermittlungs_grad_byte,40,49)
	gen pensum_det_3  = inrange(num_vermittlungs_grad_byte,30,39)
	gen pensum_det_2  = inrange(num_vermittlungs_grad_byte,20,29)
	gen pensum_det_1  = inrange(num_vermittlungs_grad_byte,10,19)
	gen pensum_det_0  = inrange(num_vermittlungs_grad_byte,0,9)

	collapse pensum_det_0 pensum_det_1 pensum_det_2 pensum_det_3 pensum_det_4 pensum_det_5 pensum_det_6 pensum_det_7 pensum_det_8 pensum_det_9  pensum_det_10, by(stes_id)
	drop pensum_det_0
	reshape long pensum_det_, i(stes_id) j(num_pensum)

	collapse pensum_det_ , by(num_pensum)
	tabstat pensum_det_ , by(num_pensum)

	tempfile share_stes_gender`i' 
	save `share_stes_gender`i''
	restore

	preserve 
	keep if date!=.
	keep if cod_gender==`i'
	keep ad_hash pensum_det_0 pensum_det_1 pensum_det_2 pensum_det_3 pensum_det_4 pensum_det_5 pensum_det_6 pensum_det_7 pensum_det_8 pensum_det_9  pensum_det_10

	bys ad_hash: gen n = _n
	egen ad_view_id = group(ad_hash n)
	drop ad_hash

	drop pensum_det_0
	
	reshape long pensum_det_, i(ad_view_id) j(num_pensum)

	collapse pensum_det_, by(num_pensum)
	tabstat pensum_det_ , by(num_pensum)

	label define num_pensum_lab 0 "0-9" 1 "10-19" 2 "20-29" 3 "30-39" 4 "40-49" 5 "50-59" 6 "60-69" 7 "70-79" 8 "80-89" 9 "90-99" 10 "100" 
	label values num_pensum num_pensum_lab

	rename pensum_det_ pensum_det_click

	merge 1:1 num_pensum using  `share_stes_gender`i''
	
	replace pensum_det_click = pensum_det_click*100
	replace pensum_det_ = pensum_det_*100

	* Produce table to have numerical estimates
	export delimited using "$results_part_time\\figure_1_`i'_table.csv" , replace
	
	graph bar (mean) pensum_det_click pensum_det_  , ///
			over(num_pensum, gap(0.2) label(labsize(small)) ) bar(1,color(gs10) lcolor(white)) bar(2,color(black) lcolor(white))  ///
			blabel(bar, format(%2.0f) size(vsmall))  ylabel(0(20)100, format(%9.0fc) labsize(medsmall))   ///
			b1title(Weekly hours in full-time equivalents FTE, margin(medsmall)) graphregion(color(white)) bgcolor(white)  legend(label(1 "Clicks") label(2 "Stated preferences"))
	graph export "$results_part_time\figure_1_`i'.eps" , as(eps) replace				
	restore
}


***
* What is the share of registered male and female jobseekers with a full-/part-time preference who use Job-Room?
* And what is the average number of clicks per month by gender and full-/part-time preference?
***

gen months_since_reg = (date-dat_anmeld)/30

gen search_jobroom = date!=. 

gen 	age_at_registration = (dat_anmeld-dat_geburts_datum)/365
replace age_at_registration = . if age_at_registration<18

preserve
keep if dat_anmeld>=td(01-06-2020)		// we keep only jobseekers who registered after june 2020
keep if stes_id!=""
keep if months_since_reg <1 | months_since_reg==.  // we count only clicks within the first month after registration

gen temp_clicks = 1 if date!=. 		

collapse fulltime cod_gender search_jobroom age_at_registration code_noga_letzt_ag (count) clicks_stes = temp_clicks, by(stes_id)

gen 	click_stes_all = clicks_stes 
replace clicks_stes = . if search_jobroom==0

eststo clear
 estpost sum search_jobroom clicks_stes click_stes_all if cod_gender==0 & fulltime==1, d // male
eststo full_male
estimates save "${save_path}\jobroom_usage_full_men" , replace  
 estpost sum search_jobroom clicks_stes click_stes_all if cod_gender==0 & fulltime==0, d // male
eststo part_male
estimates save "${save_path}\jobroom_usage_part_men" , replace  
* Difference men
estpost ttest search_jobroom clicks_stes click_stes_all if cod_gender==0, by(fulltime) 
eststo diffmen 
estimates save "${save_path}\jobroom_usage_diff_men" , replace  
quietly estpost sum search_jobroom clicks_stes click_stes_all if cod_gender==1 & fulltime==1, d // female
eststo full_female
estimates save "${save_path}\jobroom_usage_full_women" , replace  
quietly estpost sum search_jobroom clicks_stes click_stes_all if cod_gender==1 & fulltime==0, d // female
eststo part_female
estimates save "${save_path}\jobroom_usage_part_women" , replace  
* Difference women
estpost ttest search_jobroom clicks_stes click_stes_all if cod_gender==1, by(fulltime)
eststo diffwomen 
estimates save "${save_path}\jobroom_usage_diff_women" , replace  

label var search_jobroom "\quad Share active on Job-Room"
label var clicks_stes "\quad  \# ad views in 1st month (if active)"
label var click_stes_all "\quad  \# ad views in 1st month"

gen  title2_long = 1
label var title2_long "\textbf{B. Jobseeker search behavior}"
clonevar title2 = title2_long

esttab  full_female part_female diffwomen full_male part_male diffmen, cells("mean(pattern(1 1 0 1 1 0) fmt(%9.3gc)) b(star pattern(0 0 1 0 0 1) fmt(%9.3gc))")  label  collabels(none) varlabels(`e(labels)')  ///
		mgroups("\textbf{Women}" "\textbf{Men}",pattern(1 0 0 1 0 0)  span prefix(\multicolumn{@span}{c}{) suffix(})) ///
		mtitle("fulltime" "parttime" "difference" "fulltime" "parttime" "difference") ///
		title(Share jobseekers and number of searches on Job-Room) 
restore	
	
		
